import pandas as pd
df = pd.read_csv('city.csv', sep=';')
df
ID | Name | CountryCode | District | Population | |
---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 |
2 | 3 | Herat | AFG | Herat | 186800 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 |
... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 |
4079 rows × 5 columns
df['cnt'] = 5
df
ID | Name | CountryCode | District | Population | cnt | |
---|---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 | 5 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 | 5 |
2 | 3 | Herat | AFG | Herat | 186800 | 5 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 | 5 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 | 5 |
... | ... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 | 5 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 | 5 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 | 5 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 | 5 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 | 5 |
4079 rows × 6 columns
df['cnt'] = 1 if df.cnt > 100 else 2
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-5-876ae7c33d2f> in <module> ----> 1 df['cnt'] = 1 if df.cnt > 100 else 2 C:\anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self) 1476 1477 def __nonzero__(self): -> 1478 raise ValueError( 1479 f"The truth value of a {type(self).__name__} is ambiguous. " 1480 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
df.cnt = df.cnt.apply(lambda x: 1 if x > 100 else 2)
df
ID | Name | CountryCode | District | Population | cnt | |
---|---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 | 2 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 | 2 |
2 | 3 | Herat | AFG | Herat | 186800 | 2 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 | 2 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 | 2 |
... | ... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 | 2 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 | 2 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 | 2 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 | 2 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 | 2 |
4079 rows × 6 columns
df.drop(columns='cnt', inplace=True)
df
ID | Name | CountryCode | District | Population | |
---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 |
2 | 3 | Herat | AFG | Herat | 186800 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 |
... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 |
4079 rows × 5 columns
df['cnt'] = df.Name.apply(len)
df
ID | Name | CountryCode | District | Population | cnt | |
---|---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 | 5 |
1 | 2 | Qandahar | AFG | Qandahar | 237500 | 8 |
2 | 3 | Herat | AFG | Herat | 186800 | 5 |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 | 14 |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 | 9 |
... | ... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 | 10 |
4075 | 4076 | Hebron | PSE | Hebron | 119401 | 6 |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 | 8 |
4077 | 4078 | Nablus | PSE | Nablus | 100231 | 6 |
4078 | 4079 | Rafah | PSE | Rafah | 92020 | 5 |
4079 rows × 6 columns
def get_size(param):
if param > 10000000:
return 'megacity'
elif param > 1000000:
return 'large'
elif param > 500000:
return 'medium'
else:
return 'small'
df['Size'] = df.Population.apply(get_size)
df
ID | Name | CountryCode | District | Population | cnt | Size | |
---|---|---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 | 5 | large |
1 | 2 | Qandahar | AFG | Qandahar | 237500 | 8 | small |
2 | 3 | Herat | AFG | Herat | 186800 | 5 | small |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 | 14 | small |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 | 9 | medium |
... | ... | ... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 | 10 | small |
4075 | 4076 | Hebron | PSE | Hebron | 119401 | 6 | small |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 | 8 | small |
4077 | 4078 | Nablus | PSE | Nablus | 100231 | 6 | small |
4078 | 4079 | Rafah | PSE | Rafah | 92020 | 5 | small |
4079 rows × 7 columns
df.Size.value_counts()
small 3540 medium 302 large 236 megacity 1 Name: Size, dtype: int64
df[df.Size == 'megacity']
ID | Name | CountryCode | District | Population | cnt | Size | |
---|---|---|---|---|---|---|---|
1023 | 1024 | Mumbai (Bombay) | IND | Maharashtra | 10500000 | 15 | megacity |
df['Size2'] = df.Population.apply(lambda x: 'megacity' if x > 10000000 else 'large' if x > 1000000 else 'medium' if x > 500000 else 'small')
df
ID | Name | CountryCode | District | Population | cnt | Size | Size2 | |
---|---|---|---|---|---|---|---|---|
0 | 1 | Kabul | AFG | Kabol | 1780000 | 5 | large | large |
1 | 2 | Qandahar | AFG | Qandahar | 237500 | 8 | small | small |
2 | 3 | Herat | AFG | Herat | 186800 | 5 | small | small |
3 | 4 | Mazar-e-Sharif | AFG | Balkh | 127800 | 14 | small | small |
4 | 5 | Amsterdam | NLD | Noord-Holland | 731200 | 9 | medium | medium |
... | ... | ... | ... | ... | ... | ... | ... | ... |
4074 | 4075 | Khan Yunis | PSE | Khan Yunis | 123175 | 10 | small | small |
4075 | 4076 | Hebron | PSE | Hebron | 119401 | 6 | small | small |
4076 | 4077 | Jabaliya | PSE | North Gaza | 113901 | 8 | small | small |
4077 | 4078 | Nablus | PSE | Nablus | 100231 | 6 | small | small |
4078 | 4079 | Rafah | PSE | Rafah | 92020 | 5 | small | small |
4079 rows × 8 columns
df.Size2.value_counts()
small 3540 medium 302 large 236 megacity 1 Name: Size2, dtype: int64
df_country = pd.read_csv('country.csv', sep=';')
df_country
Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | LifeExpectancy | GNP | GNPOld | LocalName | GovernmentForm | HeadOfState | Capital | Code2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ABW | Aruba | North America | Caribbean | 193.0 | NaN | 103000 | 78.4 | 828.0 | 793.0 | Aruba | Nonmetropolitan Territory of The Netherlands | Beatrix | 129.0 | AW |
1 | AFG | Afghanistan | Asia | Southern and Central Asia | 652090.0 | 1919.0 | 22720000 | 45.9 | 5976.0 | NaN | Afganistan/Afqanestan | Islamic Emirate | Mohammad Omar | 1.0 | AF |
2 | AGO | Angola | Africa | Central Africa | 1246700.0 | 1975.0 | 12878000 | 38.3 | 6648.0 | 7984.0 | Angola | Republic | José Eduardo dos Santos | 56.0 | AO |
3 | AIA | Anguilla | North America | Caribbean | 96.0 | NaN | 8000 | 76.1 | 63.2 | NaN | Anguilla | Dependent Territory of the UK | Elisabeth II | 62.0 | AI |
4 | ALB | Albania | Europe | Southern Europe | 28748.0 | 1912.0 | 3401200 | 71.6 | 3205.0 | 2500.0 | Shqipëria | Republic | Rexhep Mejdani | 34.0 | AL |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
234 | YEM | Yemen | Asia | Middle East | 527968.0 | 1918.0 | 18112000 | 59.8 | 6041.0 | 5729.0 | Al-Yaman | Republic | Ali Abdallah Salih | 1780.0 | YE |
235 | YUG | Yugoslavia | Europe | Southern Europe | 102173.0 | 1918.0 | 10640000 | 72.4 | 17000.0 | NaN | Jugoslavija | Federal Republic | Vojislav Koštunica | 1792.0 | YU |
236 | ZAF | South Africa | Africa | Southern Africa | 1221037.0 | 1910.0 | 40377000 | 51.1 | 116729.0 | 129092.0 | South Africa | Republic | Thabo Mbeki | 716.0 | ZA |
237 | ZMB | Zambia | Africa | Eastern Africa | 752618.0 | 1964.0 | 9169000 | 37.2 | 3377.0 | 3922.0 | Zambia | Republic | Frederick Chiluba | 3162.0 | ZM |
238 | ZWE | Zimbabwe | Africa | Eastern Africa | 390757.0 | 1980.0 | 11669000 | 37.8 | 5951.0 | 8670.0 | Zimbabwe | Republic | Robert G. Mugabe | 4068.0 | ZW |
239 rows × 15 columns
def get_density(row):
return int(row['Population'] / row['SurfaceArea'])
df_country['Density'] = df_country.apply(get_density, axis=1)
df_country
Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | LifeExpectancy | GNP | GNPOld | LocalName | GovernmentForm | HeadOfState | Capital | Code2 | Density | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | ABW | Aruba | North America | Caribbean | 193.0 | NaN | 103000 | 78.4 | 828.0 | 793.0 | Aruba | Nonmetropolitan Territory of The Netherlands | Beatrix | 129.0 | AW | 533 |
1 | AFG | Afghanistan | Asia | Southern and Central Asia | 652090.0 | 1919.0 | 22720000 | 45.9 | 5976.0 | NaN | Afganistan/Afqanestan | Islamic Emirate | Mohammad Omar | 1.0 | AF | 34 |
2 | AGO | Angola | Africa | Central Africa | 1246700.0 | 1975.0 | 12878000 | 38.3 | 6648.0 | 7984.0 | Angola | Republic | José Eduardo dos Santos | 56.0 | AO | 10 |
3 | AIA | Anguilla | North America | Caribbean | 96.0 | NaN | 8000 | 76.1 | 63.2 | NaN | Anguilla | Dependent Territory of the UK | Elisabeth II | 62.0 | AI | 83 |
4 | ALB | Albania | Europe | Southern Europe | 28748.0 | 1912.0 | 3401200 | 71.6 | 3205.0 | 2500.0 | Shqipëria | Republic | Rexhep Mejdani | 34.0 | AL | 118 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
234 | YEM | Yemen | Asia | Middle East | 527968.0 | 1918.0 | 18112000 | 59.8 | 6041.0 | 5729.0 | Al-Yaman | Republic | Ali Abdallah Salih | 1780.0 | YE | 34 |
235 | YUG | Yugoslavia | Europe | Southern Europe | 102173.0 | 1918.0 | 10640000 | 72.4 | 17000.0 | NaN | Jugoslavija | Federal Republic | Vojislav Koštunica | 1792.0 | YU | 104 |
236 | ZAF | South Africa | Africa | Southern Africa | 1221037.0 | 1910.0 | 40377000 | 51.1 | 116729.0 | 129092.0 | South Africa | Republic | Thabo Mbeki | 716.0 | ZA | 33 |
237 | ZMB | Zambia | Africa | Eastern Africa | 752618.0 | 1964.0 | 9169000 | 37.2 | 3377.0 | 3922.0 | Zambia | Republic | Frederick Chiluba | 3162.0 | ZM | 12 |
238 | ZWE | Zimbabwe | Africa | Eastern Africa | 390757.0 | 1980.0 | 11669000 | 37.8 | 5951.0 | 8670.0 | Zimbabwe | Republic | Robert G. Mugabe | 4068.0 | ZW | 29 |
239 rows × 16 columns
for index, row in df_country.iterrows():
print(index, row.Name, row.Density, sep=' | ')
0 | Aruba | 533 1 | Afghanistan | 34 2 | Angola | 10 3 | Anguilla | 83 4 | Albania | 118 5 | Andorra | 166 6 | Netherlands Antilles | 271 7 | United Arab Emirates | 29 8 | Argentina | 13 9 | Armenia | 118 10 | American Samoa | 341 11 | Antarctica | 0 12 | French Southern territories | 0 13 | Antigua and Barbuda | 153 14 | Australia | 2 15 | Austria | 96 16 | Azerbaijan | 89 17 | Burundi | 240 18 | Belgium | 335 19 | Benin | 54 20 | Burkina Faso | 43 21 | Bangladesh | 896 22 | Bulgaria | 73 23 | Bahrain | 889 24 | Bahamas | 22 25 | Bosnia and Herzegovina | 77 26 | Belarus | 49 27 | Belize | 10 28 | Bermuda | 1226 29 | Bolivia | 7 30 | Brazil | 19 31 | Barbados | 627 32 | Brunei | 56 33 | Bhutan | 45 34 | Bouvet Island | 0 35 | Botswana | 2 36 | Central African Republic | 5 37 | Canada | 3 38 | Cocos (Keeling) Islands | 42 39 | Switzerland | 173 40 | Chile | 20 41 | China | 133 42 | Côte d’Ivoire | 45 43 | Cameroon | 31 44 | Congo, The Democratic Republic of the | 22 45 | Congo | 8 46 | Cook Islands | 84 47 | Colombia | 37 48 | Comoros | 310 49 | Cape Verde | 106 50 | Costa Rica | 78 51 | Cuba | 101 52 | Christmas Island | 18 53 | Cayman Islands | 143 54 | Cyprus | 81 55 | Czech Republic | 130 56 | Germany | 230 57 | Djibouti | 27 58 | Dominica | 94 59 | Denmark | 123 60 | Dominican Republic | 175 61 | Algeria | 13 62 | Ecuador | 44 63 | Egypt | 68 64 | Eritrea | 32 65 | Western Sahara | 1 66 | Spain | 77 67 | Estonia | 31 68 | Ethiopia | 56 69 | Finland | 15 70 | Fiji Islands | 44 71 | Falkland Islands | 0 72 | France | 107 73 | Faroe Islands | 30 74 | Micronesia, Federated States of | 169 75 | Gabon | 4 76 | United Kingdom | 245 77 | Georgia | 71 78 | Ghana | 84 79 | Gibraltar | 4166 80 | Guinea | 30 81 | Guadeloupe | 267 82 | Gambia | 115 83 | Guinea-Bissau | 33 84 | Equatorial Guinea | 16 85 | Greece | 80 86 | Grenada | 273 87 | Greenland | 0 88 | Guatemala | 104 89 | French Guiana | 2 90 | Guam | 306 91 | Guyana | 4 92 | Hong Kong | 6308 93 | Heard Island and McDonald Islands | 0 94 | Honduras | 57 95 | Croatia | 79 96 | Haiti | 296 97 | Hungary | 107 98 | Indonesia | 111 99 | India | 308 100 | British Indian Ocean Territory | 0 101 | Ireland | 53 102 | Iran | 41 103 | Iraq | 52 104 | Iceland | 2 105 | Israel | 295 106 | Italy | 191 107 | Jamaica | 235 108 | Jordan | 57 109 | Japan | 335 110 | Kazakstan | 5 111 | Kenya | 51 112 | Kyrgyzstan | 23 113 | Cambodia | 61 114 | Kiribati | 114 115 | Saint Kitts and Nevis | 145 116 | South Korea | 471 117 | Kuwait | 110 118 | Laos | 22 119 | Lebanon | 315 120 | Liberia | 28 121 | Libyan Arab Jamahiriya | 3 122 | Saint Lucia | 247 123 | Liechtenstein | 201 124 | Sri Lanka | 286 125 | Lesotho | 70 126 | Lithuania | 56 127 | Luxembourg | 168 128 | Latvia | 37 129 | Macao | 26277 130 | Morocco | 63 131 | Monaco | 22666 132 | Moldova | 129 133 | Madagascar | 27 134 | Maldives | 959 135 | Mexico | 50 136 | Marshall Islands | 353 137 | Macedonia | 78 138 | Mali | 9 139 | Malta | 1203 140 | Myanmar | 67 141 | Mongolia | 1 142 | Northern Mariana Islands | 168 143 | Mozambique | 24 144 | Mauritania | 2 145 | Montserrat | 107 146 | Martinique | 358 147 | Mauritius | 567 148 | Malawi | 92 149 | Malaysia | 67 150 | Mayotte | 399 151 | Namibia | 2 152 | New Caledonia | 11 153 | Niger | 8 154 | Norfolk Island | 55 155 | Nigeria | 120 156 | Nicaragua | 39 157 | Niue | 7 158 | Netherlands | 382 159 | Norway | 13 160 | Nepal | 162 161 | Nauru | 571 162 | New Zealand | 14 163 | Oman | 8 164 | Pakistan | 196 165 | Panama | 37 166 | Pitcairn | 1 167 | Peru | 19 168 | Philippines | 253 169 | Palau | 41 170 | Papua New Guinea | 10 171 | Poland | 119 172 | Puerto Rico | 435 173 | North Korea | 199 174 | Portugal | 108 175 | Paraguay | 13 176 | Palestine | 495 177 | French Polynesia | 58 178 | Qatar | 54 179 | Réunion | 278 180 | Romania | 94 181 | Russian Federation | 8 182 | Rwanda | 293 183 | Saudi Arabia | 10 184 | Sudan | 11 185 | Senegal | 48 186 | Singapore | 5771 187 | South Georgia and the South Sandwich Islands | 0 188 | Saint Helena | 19 189 | Svalbard and Jan Mayen | 0 190 | Solomon Islands | 15 191 | Sierra Leone | 67 192 | El Salvador | 298 193 | San Marino | 442 194 | Somalia | 15 195 | Saint Pierre and Miquelon | 28 196 | Sao Tome and Principe | 152 197 | Suriname | 2 198 | Slovakia | 110 199 | Slovenia | 98 200 | Sweden | 19 201 | Swaziland | 58 202 | Seychelles | 169 203 | Syria | 87 204 | Turks and Caicos Islands | 39 205 | Chad | 5 206 | Togo | 81 207 | Thailand | 119 208 | Tajikistan | 43 209 | Tokelau | 166 210 | Turkmenistan | 9 211 | East Timor | 59 212 | Tonga | 152 213 | Trinidad and Tobago | 252 214 | Tunisia | 58 215 | Turkey | 85 216 | Tuvalu | 461 217 | Taiwan | 615 218 | Tanzania | 37 219 | Uganda | 90 220 | Ukraine | 83 221 | United States Minor Outlying Islands | 0 222 | Uruguay | 19 223 | United States | 29 224 | Uzbekistan | 54 225 | Holy See (Vatican City State) | 2500 226 | Saint Vincent and the Grenadines | 293 227 | Venezuela | 26 228 | Virgin Islands, British | 139 229 | Virgin Islands, U.S. | 268 230 | Vietnam | 240 231 | Vanuatu | 15 232 | Wallis and Futuna | 75 233 | Samoa | 63 234 | Yemen | 34 235 | Yugoslavia | 104 236 | South Africa | 33 237 | Zambia | 12 238 | Zimbabwe | 29
df_country[df_country.Density == 0]
Code | Name | Continent | Region | SurfaceArea | IndepYear | Population | LifeExpectancy | GNP | GNPOld | LocalName | GovernmentForm | HeadOfState | Capital | Code2 | Density | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11 | ATA | Antarctica | Antarctica | Antarctica | 13120000.0 | NaN | 0 | NaN | 0.0 | NaN | – | Co-administrated | NaN | NaN | AQ | 0 |
12 | ATF | French Southern territories | Antarctica | Antarctica | 7780.0 | NaN | 0 | NaN | 0.0 | NaN | Terres australes françaises | Nonmetropolitan Territory of France | Jacques Chirac | NaN | TF | 0 |
34 | BVT | Bouvet Island | Antarctica | Antarctica | 59.0 | NaN | 0 | NaN | 0.0 | NaN | Bouvetøya | Dependent Territory of Norway | Harald V | NaN | BV | 0 |
71 | FLK | Falkland Islands | South America | South America | 12173.0 | NaN | 2000 | NaN | 0.0 | NaN | Falkland Islands | Dependent Territory of the UK | Elisabeth II | 763.0 | FK | 0 |
87 | GRL | Greenland | North America | North America | 2166090.0 | NaN | 56000 | 68.1 | 0.0 | NaN | Kalaallit Nunaat/Grønland | Part of Denmark | Margrethe II | 917.0 | GL | 0 |
93 | HMD | Heard Island and McDonald Islands | Antarctica | Antarctica | 359.0 | NaN | 0 | NaN | 0.0 | NaN | Heard and McDonald Islands | Territory of Australia | Elisabeth II | NaN | HM | 0 |
100 | IOT | British Indian Ocean Territory | Africa | Eastern Africa | 78.0 | NaN | 0 | NaN | 0.0 | NaN | British Indian Ocean Territory | Dependent Territory of the UK | Elisabeth II | NaN | IO | 0 |
187 | SGS | South Georgia and the South Sandwich Islands | Antarctica | Antarctica | 3903.0 | NaN | 0 | NaN | 0.0 | NaN | South Georgia and the South Sandwich Islands | Dependent Territory of the UK | Elisabeth II | NaN | GS | 0 |
189 | SJM | Svalbard and Jan Mayen | Europe | Nordic Countries | 62422.0 | NaN | 3200 | NaN | 0.0 | NaN | Svalbard og Jan Mayen | Dependent Territory of Norway | Harald V | 938.0 | SJ | 0 |
221 | UMI | United States Minor Outlying Islands | Oceania | Micronesia/Caribbean | 16.0 | NaN | 0 | NaN | 0.0 | NaN | United States Minor Outlying Islands | Dependent Territory of the US | George W. Bush | NaN | UM | 0 |